# load necessary libraries
library(GGally)
## Loading required package: ggplot2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr)
#load data relig_income present in tidyr package
data(relig_income)
head(relig_income)
## # A tibble: 6 × 11
## religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic 27 34 60 81 76 137 122
## 2 Atheist 12 27 37 52 35 70 73
## 3 Buddhist 27 21 30 34 33 58 62
## 4 Catholic 418 617 732 670 638 1116 949
## 5 Don’t kn… 15 14 15 11 10 35 21
## 6 Evangeli… 575 869 1064 982 881 1486 949
## # ℹ 3 more variables: `$100-150k` <dbl>, `>150k` <dbl>,
## # `Don't know/refused` <dbl>
# Reshape the dataset from wide to long format
relig_income_long <- relig_income %>%
pivot_longer(cols = -religion, names_to = "Income_Range", values_to = "Count")
# Check the structure of the data
str(relig_income_long)
## tibble [180 × 3] (S3: tbl_df/tbl/data.frame)
## $ religion : chr [1:180] "Agnostic" "Agnostic" "Agnostic" "Agnostic" ...
## $ Income_Range: chr [1:180] "<$10k" "$10-20k" "$20-30k" "$30-40k" ...
## $ Count : num [1:180] 27 34 60 81 76 137 122 109 84 96 ...
#visualization on relig_income dataset of tidyr package
# Bar plot
ggplot(relig_income_long, aes(x = Income_Range, y = Count, fill = religion)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Income Distribution by Religion",
x = "Income Range", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Heat map
ggplot(relig_income_long, aes(x = Income_Range, y = religion, fill = Count)) +
geom_tile() +
labs(title = "Heatmap of Income Distribution by Religion",
x = "Income Range", y = "Religion") +
theme_minimal() +
scale_fill_gradient(low = "white", high = "orange")

#interactive plots with plotly
p <- ggplot(relig_income, aes(x = `<$10k`, y = `$75-100k`, color = religion)) +
geom_point(size=3) +
labs(title = "Religion income dataset: <$10k vs $75-100k")
ggplotly(p)
# Bubble Chart
ggplot(relig_income_long, aes(x = Income_Range, y = religion, size = Count, color = religion)) +
geom_point(alpha = 0.7) +
scale_size(range = c(3, 20)) +
labs(title = "Income Distribution by Religious Affiliation",
x = "Income Level",
y = "Religion",
size = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Check for missing values
sum(is.na(relig_income))
## [1] 0
# Creating derived features
relig_income_enhanced <- relig_income %>%
mutate(
Total = rowSums(select(., -religion)),
Proportion_Low_Income = (`<$10k` + `$10-20k`) / Total,
Proportion_High_Income = (`$50-75k` + `$75-100k`) / Total
)
# Standardized measurements
relig_income_standardized <- relig_income %>%
mutate(across(where(is.numeric), scale))
# Binning continuous values
relig_income_binned <- relig_income %>%
mutate(
Total = rowSums(select(., -religion)),
Total_Bin = cut(Total, breaks = 5, labels = c("Very Low", "Low", "Medium", "High", "Very High")),
Proportion_Low_Income = (`<$10k` + `$10-20k`) / Total,
Proportion_Low_Income_Bin = cut(Proportion_Low_Income, breaks = 5,
labels = c("Very Low", "Low", "Medium", "High", "Very High"))
)
# Display the first few rows of each dataset
print("Enhanced Dataset:")
## [1] "Enhanced Dataset:"
print(head(relig_income_enhanced))
## # A tibble: 6 × 14
## religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic 27 34 60 81 76 137 122
## 2 Atheist 12 27 37 52 35 70 73
## 3 Buddhist 27 21 30 34 33 58 62
## 4 Catholic 418 617 732 670 638 1116 949
## 5 Don’t kn… 15 14 15 11 10 35 21
## 6 Evangeli… 575 869 1064 982 881 1486 949
## # ℹ 6 more variables: `$100-150k` <dbl>, `>150k` <dbl>,
## # `Don't know/refused` <dbl>, Total <dbl>, Proportion_Low_Income <dbl>,
## # Proportion_High_Income <dbl>
print("Standardized Dataset:")
## [1] "Standardized Dataset:"
print(head(relig_income_standardized))
## # A tibble: 6 × 11
## religion `<$10k`[,1] `$10-20k`[,1] `$20-30k`[,1] `$30-40k`[,1] `$40-50k`[,1]
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic -0.475 -0.472 -0.408 -0.351 -0.352
## 2 Atheist -0.564 -0.500 -0.482 -0.451 -0.503
## 3 Buddhist -0.475 -0.523 -0.505 -0.513 -0.510
## 4 Catholic 1.84 1.81 1.76 1.67 1.72
## 5 Don’t kno… -0.546 -0.551 -0.553 -0.592 -0.595
## 6 Evangelic… 2.77 2.80 2.83 2.74 2.62
## # ℹ 5 more variables: `$50-75k` <dbl[,1]>, `$75-100k` <dbl[,1]>,
## # `$100-150k` <dbl[,1]>, `>150k` <dbl[,1]>, `Don't know/refused` <dbl[,1]>
print("Binned Dataset:")
## [1] "Binned Dataset:"
print(head(relig_income_binned))
## # A tibble: 6 × 15
## religion `<$10k` `$10-20k` `$20-30k` `$30-40k` `$40-50k` `$50-75k` `$75-100k`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic 27 34 60 81 76 137 122
## 2 Atheist 12 27 37 52 35 70 73
## 3 Buddhist 27 21 30 34 33 58 62
## 4 Catholic 418 617 732 670 638 1116 949
## 5 Don’t kn… 15 14 15 11 10 35 21
## 6 Evangeli… 575 869 1064 982 881 1486 949
## # ℹ 7 more variables: `$100-150k` <dbl>, `>150k` <dbl>,
## # `Don't know/refused` <dbl>, Total <dbl>, Total_Bin <fct>,
## # Proportion_Low_Income <dbl>, Proportion_Low_Income_Bin <fct>
# Now we make interactive visualization and dashboard using R shiny
library(shinydashboard)
##
## Attaching package: 'shinydashboard'
##
## The following object is masked from 'package:graphics':
##
## box
library(shiny)
ui <- dashboardPage(
dashboardHeader(title = "Religion Income Explorer"),
dashboardSidebar(
selectInput("x_var", "X Variable:",
choices = unique(relig_income_long$Income_Range)),
selectInput("y_var", "Y Variable:",
choices = unique(relig_income_long$Income_Range),
selected = "$75-100k"),
checkboxGroupInput("religions", "Select Religions:",
choices = unique(relig_income_long$religion),
selected = unique(relig_income_long$religion)[1:5])
),
dashboardBody(
fluidRow(
box(plotlyOutput("scatter_plot"), width = 8),
box(plotOutput("bar_plot"), width = 4)
),
fluidRow(
box(dataTableOutput("data_table"), width = 12)
)
)
)
## `shiny::dataTableOutput()` is deprecated as of shiny 1.8.1.
## Please use `DT::DTOutput()` instead.
## See <https://rstudio.github.io/DT/shiny.html> for more information.
server <- function(input, output) {
filtered_data <- reactive({
relig_income_long %>%
filter(religion %in% input$religions)
})
output$scatter_plot <- renderPlotly({
plot_data <- filtered_data() %>%
pivot_wider(names_from = Income_Range, values_from = Count) %>%
select(religion, !!sym(input$x_var), !!sym(input$y_var))
p <- ggplot(plot_data, aes(x = !!sym(input$x_var), y = !!sym(input$y_var), color = religion, text = religion)) +
geom_point(size = 3) +
labs(title = paste(input$y_var, "vs", input$x_var),
x = input$x_var, y = input$y_var) +
theme_minimal()
ggplotly(p, tooltip = "text")
})
output$bar_plot <- renderPlot({
ggplot(filtered_data() %>% filter(Income_Range %in% c(input$x_var, input$y_var)),
aes(x = religion, y = Count, fill = Income_Range)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Selected Income Ranges",
x = "Religion", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
})
output$data_table <- renderDataTable({
filtered_data() %>%
pivot_wider(names_from = Income_Range, values_from = Count) %>%
arrange(desc(`$75-100k`))
})
}
shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents